This is the online supplementary to the paper “Traditional Chinese Medicine: A Bayesian network model of public awareness, usage determinants, and perception of scientific support in Austria” by Eigenschink, Bellach, Leonard, Dablander, Maier, Dablander, and Sitte. It includes the code and data to fully reproduce the results.
We load the data, which holds 1382 participants.
library('dplyr')
library('readr')
library('tidyr')
library('kableExtra')
dat_raw <- read_delim('Data/dat.csv', delim = ';', skip = 1, na = c('', 'NA', -9))
dat1 <- dat_raw %>%
mutate(
malus = as.numeric(gsub(',', '.', malus)),
age = as.numeric(age),
age = ifelse(age == 1994, 2020 - 1994, age),
age = ifelse(age == '60 Jahre', 60, age),
age = ifelse(age == 'Sechzig', 60, age),
age = ifelse(age == '50 plus', 50, age),
)
dim(dat1)
## [1] 1382 41
We select only the relevant variables.
relevant_variables <- c(
'obtained', 'gender', 'age', 'education', 'employement',
'income', 'disease', 'tcm_known', 'tcm_use', 'tcm_frequency', 'tcm_science', 'tcm_trust',
'med_money', 'tcm_money', 'tcm_moneyrel', 'globuli_use', 'globuli_pass', 'vax_use',
'vax_fresh', 'needle_use', 'needle_frequency', 'type'
)
dat_relevant <- select(dat1, all_of(relevant_variables))
There are 9 participants who had NAs for all relevant variables. We remove these participants.
index_all_na <- which(apply(select(dat_relevant, -type), 1, function(x) all(is.na(x))))
length(index_all_na)
## [1] 9
dat2 <- dat1[-index_all_na, ]
We also exclude those that indicated NA in some of the demographic variables, including education, employement, and income. This excluded 44 participants.
demographics <- c('gender', 'age', 'education', 'employement', 'income')
dat3 <- dat2 %>%
drop_na(demographics)
nrow(dat2) - nrow(dat3)
## [1] 44
For simplicity, we remove anybody who indicates “other” for gender (4 observations). Similarly, we remove somebody who indicated “-” as his/her age. This removes 4 participants.
dat4 <- filter(dat3, age != '-', gender != 3)
nrow(dat3) - nrow(dat4)
## [1] 4
Finally, we remove participants that have answered the online survey unreasonably quickly. We do this by removing participants with a malus score of 2 or higher (or those that had NA). This excludes 7 participants.
dat5 <- filter(dat4, !is.na(malus), !(type == 2 & malus > 2))
nrow(dat4) - nrow(dat5)
## [1] 7
We remove those particicpants that have type = ‘street’ yet have no obtained = 1.
dat <- dat5 %>% filter(!(type == 1 & obtained != 1))
nrow(dat5) - nrow(dat)
## [1] 11
In total, we have excluded 75 participants.
nrow(dat_raw) - nrow(dat)
## [1] 75
In this section, we will visualize and describe the sample. Here are some descriptive statistics we report in the main text.
# Get age and education statistics
dat %>%
group_by(gender) %>%
summarize(
n = n(),
age_m = mean(age),
age_sd = sd(age),
edu_norm = mean(education >= 6)
)
## # A tibble: 2 x 5
## gender n age_m age_sd edu_norm
## <dbl> <int> <dbl> <dbl> <dbl>
## 1 1 901 41.7 15.6 0.422
## 2 2 406 40.5 16.9 0.394
# Get income statistics
dat %>%
filter(income != 12) %>%
group_by(gender) %>%
summarize(
n = n(),
inc_norm = mean(income >= 7)
)
## # A tibble: 2 x 3
## gender n inc_norm
## <dbl> <int> <dbl>
## 1 1 760 0.321
## 2 2 343 0.472
# Get disease statistics
dat %>%
filter(!is.na(disease)) %>%
group_by(gender) %>%
summarize(
n = n(),
disease = mean(2 - disease)
)
## # A tibble: 2 x 3
## gender n disease
## <dbl> <int> <dbl>
## 1 1 900 0.453
## 2 2 406 0.387
# 'Gesamtzeit Fragebogen' according to the codebook
dat %>%
group_by(type) %>%
summarize(
time = median(time_total)
)
## # A tibble: 2 x 2
## type time
## <dbl> <dbl>
## 1 1 189
## 2 2 212
We first visualise the age, education, and income distribution across gender.
library('ggplot2')
library('gridExtra')
library('RColorBrewer')
# Easy to change colours
cols <- brewer.pal(3, 'Set1')[-2]
cols <- rev(ggsci::pal_jco(alpha = 0.60)(2))
custom_theme <- theme_minimal() +
theme(
legend.position = 'top',
axis.ticks.y = element_blank(),
panel.border = element_blank(),
panel.spacing.x = unit(1, 'lines'),
panel.spacing.y = unit(1, 'lines'),
strip.text.x = element_text(size = 14),
strip.text.y = element_text(size = 14),
legend.text = element_text(size = 10),
legend.title = element_text(size = 14),
axis.text = element_text(size = 10),
axis.title = element_text(size = 12),
plot.title = element_text(hjust = 0.5, size = 18),
plot.subtitle = element_text(hjust = 0.50, size = 14),
)
remove_grid <- theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank()
)
education_labels <- c(
'Elementary school',
'Secondary school (GSCE)',
'Apprenticeship',
'Further education (A-level)',
'Special certificate*',
'Bachelor\'s',
'Master\'s',
'PhD'
)
income_labels <- c(
'No income', '< 250', '250 - 500', '500 - 1000',
'1000 - 1500', '1500 - 2000', '2000 - 2500',
'2500 - 3000', '3000 - 3500', '3500 - 4000',
'> 4000', 'No answer'
)
income_labels <- c(
'No income', '< 250€', '250€ - < 500€', '500€ - < 1000€',
'1000€ - < 1500€', '1500€ - < 2000€', '2000€ - < 2500€',
'2500€ - < 3000€', '3000€ - < 3500€', '3500€ - < 4000€',
'>= 4000€', 'No answer'
)
desc <- dat %>%
select(gender, age, education, income) %>%
pivot_longer(-gender, names_to = 'variable') %>%
mutate(gender = ifelse(gender == 1, 'Female (n = 901)', 'Male (n = 406)'))
desc_age <- filter(desc, variable == 'age')
male_values <- desc_age[desc_age$gender == 'Male (n = 406)', ]$value
female_values <- desc_age[desc_age$gender != 'Male (n = 406)', ]$value
desc_age$age_rel <- sapply(seq(nrow(desc_age)), function(i) {
row <- desc_age[i, ]
is_male <- row$gender == 'Male (n = 406)'
ifelse(is_male, mean(row$value == male_values), mean(row$value == female_values))
})
p1 <- ggplot(unique(desc_age), aes(x = value, y = age_rel, fill = gender)) +
geom_bar(stat = 'identity', position = position_dodge(1), width = 1) +
scale_fill_manual('', values = cols) +
scale_x_continuous('Age', breaks = scales::pretty_breaks(n = 10)) +
ylab('Relative frequency') +
ggtitle('Age') +
custom_theme +
remove_grid
plot_bar <- function(desc, variable, title, labels, colors = cols) {
d <- desc[desc$variable == variable, ]
male_values <- d[d$gender == 'Male (n = 406)', ]$value
female_values <- d[d$gender != 'Male (n = 406)', ]$value
d$rel <- sapply(seq(nrow(d)), function(i) {
row <- d[i, ]
is_male <- row$gender == 'Male (n = 406)'
ifelse(is_male, mean(row$value == male_values), mean(row$value == female_values))
})
ggplot(unique(d), aes(x = value, y = rel, fill = gender)) +
# geom_bar(position = position_dodge(0.9)) +
geom_bar(stat = 'identity', position = position_dodge(1)) +#, width = 1) +
scale_fill_manual('', values = colors) +
scale_x_continuous('', labels = labels, breaks = seq(length(labels))) +
ylab('Relative frequency') +
ggtitle(title) +
custom_theme +
remove_grid +
coord_flip() +
guides(fill = FALSE)
}
p2 <- plot_bar(desc, 'education', 'Education', education_labels) + ylim(c(0, 0.30))
p3 <- plot_bar(desc, 'income', 'Income', income_labels) + ylim(c(0, 0.18))
# cairo_pdf('Figures/Demographics.pdf', width = 9, height = 8)
grid.arrange(
p1, p2, p3,
layout_matrix = rbind(c(1, 1), c(2, 3))
)
# dev.off()
We now compare our sample to data from Statistik Austria (https://bit.ly/3qgYxsN; “Population 15 years and older by highest level of education completed by sex and age, 2019”, Excel file), which provides data on all Austrians, to assess the representativeness of our sample in terms of age, gender, and education.
Since our age variable is more fine-grained, we aggregate it to match Statistik Austria’s reporting. Note that we did not ask for post-secondary non-tertiary or short tertiary education, which are included in Statistik Austria’s reporting.
library('readxl')
datsa <- read_excel('Data/statistik-austria.xlsx', sheet = 2)
datsa_long <- datsa %>%
pivot_longer(
!(gender | age_group | total),
names_to = 'education_group',
values_to = 'count'
) %>%
select(-total) %>%
mutate(
education_group = factor(education_group, levels = unique(education_group))
)
Here we compare the marginal distribution of age across the sexes in the Austrian general population to the sample distribution.
create_age_groups <- function(dat) {
age_groups <- c('15 - 29', '30 - 49', '50 - 64', '65 - 84', '85 or older')
dat[['age_group']] <- ifelse(
dat[['age']] <= 29, age_groups[1],
ifelse(
dat[['age']] <= 49, age_groups[2],
ifelse(
dat[['age']] <= 64, age_groups[3],
ifelse(
dat[['age']] <= 84, age_groups[4], age_groups[5]
)
)
)
)
dat
}
demo <- select(dat, age, gender, income, education) %>%
mutate(gender = ifelse(gender == 1, 'female', 'male'))
demo <- create_age_groups(demo)
# Statistik Austria data from https://bit.ly/3qgYxsN
gender_age_SA <- datsa %>%
group_by(gender, age_group) %>%
summarize(total = total) %>%
group_by(gender) %>%
mutate(pop_prop = round(total / sum(total), 4)) %>%
select(-total)
gender_age_obs <- demo %>%
group_by(gender, age_group) %>%
summarize(total = n()) %>%
group_by(gender) %>%
mutate(obs_prop = round(total / sum(total), 4)) %>%
select(-total)
gender_age <- full_join(gender_age_SA, gender_age_obs)
gender_age
## # A tibble: 10 x 4
## # Groups: gender [2]
## gender age_group pop_prop obs_prop
## <chr> <chr> <dbl> <dbl>
## 1 female 15 - 29 0.194 0.268
## 2 female 30 - 49 0.307 0.407
## 3 female 50 - 64 0.253 0.236
## 4 female 65 - 84 0.206 0.0855
## 5 female 85 or older 0.0391 0.0033
## 6 male 15 - 29 0.215 0.367
## 7 male 30 - 49 0.325 0.303
## 8 male 50 - 64 0.262 0.224
## 9 male 65 - 84 0.178 0.106
## 10 male 85 or older 0.02 NA
Below we look at education across gender.
create_education_groups <- function(dat) {
education_groups <- c('primary', 'lower_secondary', 'upper_secondary', 'bachelor', 'master', 'doctorate')
dat[['education_group']] <- ifelse(
dat[['education']] == 1, education_groups[1],
ifelse(
dat[['education']] == 2, education_groups[2],
ifelse(
dat[['education']] %in% c(3, 4, 5), education_groups[3],
ifelse(
dat[['education']] == 6, education_groups[4],
ifelse(
dat[['education']] == 7, education_groups[5], education_groups[6]
)
)
)
)
)
dat[['education_group']] <- factor(
dat[['education_group']],
levels = c('primary', 'lower_secondary', 'upper_secondary', 'bachelor', 'master', 'doctorate')
)
dat
}
demo <- create_education_groups(demo)
# Statistik Austria data from https://bit.ly/3qgYxsN
gender_education_SA <- datsa_long %>%
group_by(gender, education_group) %>%
summarize(total = sum(count)) %>%
group_by(gender) %>%
mutate(
pop_prop = round(total / sum(total), 4),
education_group_coarse = ifelse(
education_group %in% c('lower_secondary_1', 'lower_secondary_2'),'lower_secondary',
ifelse(
education_group %in% c('upper_secondary_1', 'upper_secondary_2'),
'upper_secondary', as.character(education_group)
)
)
) %>%
select(-total, -education_group) %>%
group_by(gender, education_group_coarse) %>%
summarize(pop_prop = sum(pop_prop)) %>%
mutate(
education_group = factor(
education_group_coarse,
levels = c(
'primary', 'lower_secondary', 'upper_secondary',
'post_secondary_non_tertiary', 'short_tertiary', 'bachelor', 'master', 'doctorate'
)
)
) %>%
select(gender, education_group, pop_prop) %>%
arrange(gender, education_group)
gender_education_obs <- demo %>%
group_by(gender, education_group) %>%
summarize(total = n()) %>%
group_by(gender) %>%
mutate(obs_prop = round(total / sum(total), 4)) %>%
select(-total)
gender_education <- full_join(gender_education_SA, gender_education_obs)
gender_education
## # A tibble: 16 x 4
## # Groups: gender [2]
## gender education_group pop_prop obs_prop
## <chr> <fct> <dbl> <dbl>
## 1 female primary 0.0762 0.0078
## 2 female lower_secondary 0.220 0.0499
## 3 female upper_secondary 0.427 0.520
## 4 female post_secondary_non_tertiary 0.0336 NA
## 5 female short_tertiary 0.119 NA
## 6 female bachelor 0.0291 0.160
## 7 female master 0.0883 0.212
## 8 female doctorate 0.0072 0.0499
## 9 male primary 0.0372 0.0074
## 10 male lower_secondary 0.169 0.0887
## 11 male upper_secondary 0.510 0.510
## 12 male post_secondary_non_tertiary 0.009 NA
## 13 male short_tertiary 0.148 NA
## 14 male bachelor 0.0202 0.0887
## 15 male master 0.0943 0.229
## 16 male doctorate 0.0129 0.0764
Because we did not assess short tertiary education, and in fact some responses that we now coded as ‘upper_secondary’ education (e.g., certain types of ‘Handelsakademien’, depending on the length of the schooling) are actually short tertiary education, we merge short tertiary education (as well as post-secondary non-tertiary) into upper secondary education in the Statistik Austria data. This makes the survey weighting we do below possible, because it assumes no missing data. We stratify according to the joint distribution of gender, age, and education; we prepare this here.
datsa$lower_secondary <- datsa$lower_secondary_1 + datsa$lower_secondary_2
datsa$upper_secondary <- c(
datsa$upper_secondary_1 + datsa$upper_secondary_2 +
datsa$post_secondary_non_tertiary + datsa$short_tertiary
)
pop_joint <- datsa[, c(1, 2, 4, 14, 15, 11, 12, 13)]
pop_joint <- pop_joint %>%
pivot_longer(
!(gender | age_group),
names_to = 'education_group',
values_to = 'Freq'
) %>%
mutate(
gender = ifelse(gender == 'female', 1, 2),
education_group = factor(
education_group,
levels = c(
'primary', 'lower_secondary', 'upper_secondary', 'bachelor', 'master', 'doctorate')
)
)
# Population joint distribution of gender, age, and education
pop_joint
## # A tibble: 60 x 4
## gender age_group education_group Freq
## <dbl> <chr> <fct> <dbl>
## 1 2 15 - 29 primary 30556
## 2 2 15 - 29 lower_secondary 252453
## 3 2 15 - 29 upper_secondary 453927
## 4 2 15 - 29 bachelor 36496
## 5 2 15 - 29 master 25608
## 6 2 15 - 29 doctorate 451
## 7 2 30 - 49 primary 19481
## 8 2 30 - 49 lower_secondary 177582
## 9 2 30 - 49 upper_secondary 787578
## 10 2 30 - 49 bachelor 34841
## # … with 50 more rows
We first give descriptives for all participants (rather than only those who are familiar with TCM, see below). We remove participants who did not answer the relevant question, which results in 19 exclusions.
dat_var_all <- dat %>%
filter(
!is.na(disease), !is.na(tcm_known), !is.na(tcm_use),
!is.na(globuli_use), !is.na(globuli_pass), !is.na(needle_use)
)
nrow(dat) - nrow(dat_var_all)
## [1] 19
library('papaja')
tab_use_all <- dat_var_all %>%
group_by(gender) %>%
summarize(
# disease_m = mean(disease == 1),
tcm_known_m = mean(tcm_known == 1),
tcm_use_m = mean(tcm_use == 1),
globuli_use_m = mean(globuli_use == 1),
globuli_pass_m = mean(globuli_pass == 1),
needle_use_m = mean(needle_use == 1),
) %>%
mutate_all(
round, 3
) %>%
mutate(
gender = ifelse(gender == 1, 'Female', 'Male')
)
cnames <- c(
'gender', 'Knows TCM', 'Has used TCM', 'Has used Homeopathy',
'Has passed on Homeopathy', 'Has used Accupuncture'
)
colnames(tab_use_all) <- cnames
apa_table(
tab_use_all, caption = 'Table 1. Summarizes TCM, Homeopathy, and Accupuncture use.',
digits = 3
)
| gender | Knows TCM | Has used TCM | Has used Homeopathy | Has passed on Homeopathy | Has used Accupuncture |
|---|---|---|---|---|---|
| Female | 0.955 | 0.628 | 0.682 | 0.439 | 0.611 |
| Male | 0.908 | 0.414 | 0.409 | 0.185 | 0.394 |
Below we give the descriptives weighted according to the Statistik Austria joint population distribution of gender, age, and education.
library('survey')
# Add 'age_group' and 'education_group' columns
dat_var_all <- create_age_groups(dat_var_all)
dat_var_all <- create_education_groups(dat_var_all)
get_prop <- function(formula, weighting) {
tab <- svytable(formula, weighting, round = TRUE)
row <- (tab / rowSums(tab))[, 1]
round(as.numeric(row), 3)
}
# Create the survey design
design <- svydesign(ids = ~1, data = dat_var_all, weights = NULL)
ps <- postStratify(design, ~gender + age_group + education_group, pop_joint, partial = TRUE)
tab_use_all_weighted <- data.frame(
'gender' = c('Female', 'Male'),
'Knows TCM' = get_prop(~gender + tcm_known, ps),
'Has used TCM' = get_prop(~gender + tcm_use, ps),
'Has used Homeopathy' = get_prop(~gender + globuli_use, ps),
'Has passed on Homeopathy' = get_prop(~gender + globuli_pass, ps),
'Has used Accupuncture' = get_prop(~gender + needle_use, ps)
)
colnames(tab_use_all_weighted) <- cnames
apa_table(
tab_use_all_weighted,
caption = 'Table 1. Summarizes TCM, Homeopathy, and Accupuncture using survey weights.',
digits = 3
)
| gender | Knows TCM | Has used TCM | Has used Homeopathy | Has passed on Homeopathy | Has used Accupuncture |
|---|---|---|---|---|---|
| Female | 0.899 | 0.583 | 0.716 | 0.434 | 0.619 |
| Male | 0.906 | 0.512 | 0.451 | 0.187 | 0.486 |
Here we remove all participants who do not know any or either of TCM, homeopathy, or accupuncture. We also remove those who do not know the answer to the relevant questions (e.g., those that do not know how often they use TCM). This removes 232 participants.
Since we have tcm_money, which indicates how much money they have spent on TCM, we do not include tcm_moneyrel, which indicates how much money relative to their other medical spending they have spent on TCM. tcm_moneyrel is also only available for people who took the online survey.
dat_clean <- dat %>%
select(-tcm_moneyrel) %>%
filter(
# Remove people who don't know TCM or who don't know the answer
tcm_known != 2,
!(tcm_use %in% c(3, 4)),
!(tcm_frequency %in% c(6, 7)),
!(tcm_trust %in% c(6, 7)),
!(tcm_science %in% c(6, 7)),
!(tcm_money %in% c(8, 9)),
# Remove people who don't know homeopathy or who don't know the answer
!(globuli_use %in% c(3, 4)),
!(globuli_pass %in% c(3, 4)),
# Remove people who don't know accupuncture or who don't know the answer
!(needle_use %in% c(3, 4)),
!(needle_frequency %in% c(6, 7))
)
nrow(dat) - nrow(dat_clean)
## [1] 232
We recode the variables so that an increase in number is associated with an increase in whatever the variable measures (e.g., money or agreement). We also recode binary variables so that 0 indicates absence or negation. We also remove those participants who had a NA in any of their answers, because otherwise the proportions in the descriptives table refer to different population sizes. This excludes 13 participants. (Note that there were no NAs in education, income, or med_money; these variables will be used later.)
dat_var <- dat_clean %>%
select(
gender, age, disease, starts_with('tcm'),
starts_with('globuli'), starts_with('vax'),
starts_with('needle'), income, education, med_money
) %>%
drop_na() %>% # removes 13 people
mutate(
# tcm_trust, tcm_science, vax_use, vax_fresh need to be
# reverse coded so that larger means stronger agreement
tcm_trust = 6 - tcm_trust,
tcm_science = 6 - tcm_science,
vax_use = 6 - vax_use,
vax_fresh = 6 - vax_fresh,
# binary indicators need to be recoded so that 1 means yes
disease = 3 - disease,
tcm_known = 3 - tcm_known,
tcm_use = 3 - tcm_use,
globuli_use = 3 - globuli_use,
globuli_pass = 3 - globuli_pass,
needle_use = 3 - needle_use
)
# Recode variables to start at 0, not at 1 (except age)
dat_var <- as_tibble(dat_var - 1) %>% mutate(age = age + 1)
nrow(dat_clean) - nrow(dat_var)
## [1] 13
The table below shows the proportion of men and women who know TCM, have used TCM, have used homeopathy, passed on homeopathy to others, or have used accupuncture in the last three years.
tab_use <- dat_var %>%
group_by(gender) %>%
summarize(
tcm_known_m = mean(tcm_known == 1),
tcm_use_m = mean(tcm_use == 1),
globuli_use_m = mean(globuli_use == 1),
globuli_pass_m = mean(globuli_pass == 1),
needle_use_m = mean(needle_use == 1),
) %>%
mutate_all(
round, 3
) %>%
mutate(
gender = ifelse(gender == 0, 'Female', 'Male')
)
colnames(tab_use) <- c(
'Gender', 'Knows TCM', 'Has used TCM', 'Has used Homeopathy',
'Has passed on Homeopathy', 'Has used Accupuncture'
)
apa_table(
tab_use, caption = 'Table 1. Summarizes TCM, Homeopathy, and Accupuncture use.',
digits = 3
)
| Gender | Knows TCM | Has used TCM | Has used Homeopathy | Has passed on Homeopathy | Has used Accupuncture |
|---|---|---|---|---|---|
| Female | 1.000 | 0.683 | 0.703 | 0.464 | 0.652 |
| Male | 1.000 | 0.470 | 0.429 | 0.211 | 0.432 |
We again present the descriptive statistics using the same survey weighting procedure as above.
# Add 'age_group' and 'education_group' columns
dat_var <- create_age_groups(dat_var)
dat_var <- create_education_groups(dat_var)
# Adjust 'gender' in population joint distribution (0-1 coding instead of 1-2 coding)
pop_joint$gender <- pop_joint$gender - 1
# Create the survey design
design <- svydesign(ids = ~1, data = dat_var, weights = NULL)
ps <- postStratify(design, ~gender + age_group + education_group, pop_joint, partial = TRUE)
# Since we recoded the variables above, we need to adjust this function
# The value '1' now indicates use / knowledge
get_prop <- function(formula, weighting) {
tab <- svytable(formula, weighting, round = TRUE)
if (ncol(tab) == 1) {
row <- (tab / rowSums(tab))[, 1]
} else {
row <- (tab / rowSums(tab))[, 2]
}
round(as.numeric(row), 3)
}
tab_use_weighted <- data.frame(
'gender' = c('Female', 'Male'),
'Knows TCM' = get_prop(~gender + tcm_known, ps),
'Has used TCM' = get_prop(~gender + tcm_use, ps),
'Has used Homeopathy' = get_prop(~gender + globuli_use, ps),
'Has passed on Homeopathy' = get_prop(~gender + globuli_pass, ps),
'Has used Accupuncture' = get_prop(~gender + needle_use, ps)
)
colnames(tab_use_weighted) <- cnames
apa_table(
tab_use_weighted,
caption = 'Table 1. Summarizes TCM, Homeopathy, and Accupuncture using survey weights.',
digits = 3
)
| gender | Knows TCM | Has used TCM | Has used Homeopathy | Has passed on Homeopathy | Has used Accupuncture |
|---|---|---|---|---|---|
| Female | 1.000 | 0.680 | 0.739 | 0.464 | 0.687 |
| Male | 1.000 | 0.509 | 0.436 | 0.236 | 0.492 |
The table below shows how often men and women have used TCM in the last three years.
create_table <- function(dat, var, colname, labels) {
tab <- table(dat$gender, dat[[var]], dnn = c('Gender', colname))
rownames(tab) <- c('Female', 'Male')
colnames(tab) <- labels
# Normalize across columns
tab <- t(apply(tab, 1, function(x) round(x / sum(x), 3)))
tab
}
frequency_labels <- c(
'0', '1 - <5', '5 - <10', '10 - <20', '>= 20'
)
tab_tcm <- create_table(dat_var, 'tcm_frequency', 'Frequency of TCM Use', frequency_labels)
apa_table(
tab_tcm, caption = 'Table 2. Summarizes Frequency of TCM use in the last 3 years.',
digits = 3
)
| 0 | 1 - <5 | 5 - <10 | 10 - <20 | >= 20 | |
|---|---|---|---|---|---|
| Female | 0.411 | 0.262 | 0.117 | 0.087 | 0.123 |
| Male | 0.647 | 0.174 | 0.063 | 0.032 | 0.085 |
We again present the weighted results.
create_weighted_table <- function(formula, weighting, cnames) {
tab <- svytable(formula, weighting, round = TRUE)
tab <- round(tab / rowSums(tab), 3)
rownames(tab) <- c('Female', 'Male')
colnames(tab) <- cnames
class(tab) <- c('matrix', 'array')
tab
}
tab_tcm_weighted <- create_weighted_table(~gender + tcm_frequency, ps, frequency_labels)
apa_table(
tab_tcm_weighted, caption = 'Table 2. Summarizes Frequency of TCM use in the last 3 years using survey weights.',
digits = 3
)
| 0 | 1 - <5 | 5 - <10 | 10 - <20 | >= 20 | |
|---|---|---|---|---|---|
| Female | 0.411 | 0.271 | 0.121 | 0.093 | 0.104 |
| Male | 0.605 | 0.145 | 0.084 | 0.056 | 0.109 |
The table below shows how often men and women have used accupuncture in the last three years.
tab_needle <- create_table(
dat_var, 'needle_frequency', 'Frequency of Accupuncture Use',
frequency_labels
)
apa_table(
tab_needle, caption = 'Table 3. Summarizes Accupuncture use in the last 3 years.',
digits = 3
)
| 0 | 1 - <5 | 5 - <10 | 10 - <20 | >= 20 | |
|---|---|---|---|---|---|
| Female | 0.533 | 0.248 | 0.083 | 0.066 | 0.070 |
| Male | 0.732 | 0.098 | 0.063 | 0.044 | 0.063 |
We again present the weighted results.
tab_needle_weighted <- create_weighted_table(~gender + needle_frequency, ps, frequency_labels)
apa_table(
tab_needle_weighted,
caption = 'Table 3. Summarizes Accupuncture use in the last 3 years using survey weights.',
digits = 3
)
| 0 | 1 - <5 | 5 - <10 | 10 - <20 | >= 20 | |
|---|---|---|---|---|---|
| Female | 0.514 | 0.267 | 0.080 | 0.071 | 0.069 |
| Male | 0.697 | 0.124 | 0.074 | 0.073 | 0.031 |
The table below shows the trust in doctors with TCM training across the genders.
agree_labels <- c(
'Do not Agree', 'Rather Disagree',
'Partly Agree', 'Mostly Agree', 'Completely Agree'
)
tab_trust <- create_table(
dat_var, 'tcm_trust', 'Trust in Doctors with TCM training',
agree_labels
)
apa_table(
tab_trust, caption = 'Table 6. Summarizes Trust in doctors with TCM training.',
digits = 3
)
| Do not Agree | Rather Disagree | Partly Agree | Mostly Agree | Completely Agree | |
|---|---|---|---|---|---|
| Female | 0.048 | 0.040 | 0.102 | 0.187 | 0.623 |
| Male | 0.117 | 0.095 | 0.126 | 0.218 | 0.445 |
We again present the weighted results.
tab_trust_weighted <- create_weighted_table(~gender + tcm_trust, ps, agree_labels)
apa_table(
tab_trust_weighted, caption = 'Table 6. Summarizes Trust in doctors with TCM training using survey weights.',
digits = 3
)
| Do not Agree | Rather Disagree | Partly Agree | Mostly Agree | Completely Agree | |
|---|---|---|---|---|---|
| Female | 0.043 | 0.029 | 0.095 | 0.204 | 0.629 |
| Male | 0.124 | 0.064 | 0.110 | 0.215 | 0.487 |
The table below shows the strength with which people believe / agree that TCM is scientific across the genders.
tab_science <- create_table(
dat_var, 'tcm_science', 'Believes TCM to be scientific',
agree_labels
)
apa_table(
tab_science, caption = 'Table 7. Summarizes the agreement with TCM being scientific.',
digits = 3
)
| Do not Agree | Rather Disagree | Partly Agree | Mostly Agree | Completely Agree | |
|---|---|---|---|---|---|
| Female | 0.089 | 0.090 | 0.193 | 0.248 | 0.380 |
| Male | 0.189 | 0.114 | 0.237 | 0.249 | 0.211 |
We again present the weighted results.
tab_science_weighted <- create_weighted_table(~gender + tcm_science, ps, agree_labels)
apa_table(
tab_science_weighted, caption = 'Table 7. Summarizes the agreement with TCM being scientific using survey weights.',
digits = 3
)
| Do not Agree | Rather Disagree | Partly Agree | Mostly Agree | Completely Agree | |
|---|---|---|---|---|---|
| Female | 0.080 | 0.079 | 0.176 | 0.255 | 0.409 |
| Male | 0.181 | 0.103 | 0.218 | 0.261 | 0.236 |
We remove participants who did not know there medical expenses. This removes 34 participants. The table below shows medical expenses across the genders.
dat_varm <- filter(dat_var, med_money != 7)
nrow(dat_var) - nrow(dat_varm)
## [1] 34
money_labels <- c(
'0', '1 - <100', '100 - <250', '250 - <500',
'500 - <750', '750 - <1000', '>= 1000'
)
tab_med <- create_table(dat_varm, 'med_money', 'Medical Expenses', money_labels)
apa_table(tab_med, caption = 'Table 4. Summarizes Medical expenses in the last 3 years.', digits = 3)
| 0 | 1 - <100 | 100 - <250 | 250 - <500 | 500 - <750 | 750 - <1000 | >= 1000 | |
|---|---|---|---|---|---|---|---|
| Female | 0.053 | 0.129 | 0.131 | 0.188 | 0.149 | 0.100 | 0.251 |
| Male | 0.110 | 0.153 | 0.169 | 0.169 | 0.097 | 0.097 | 0.205 |
We again present the weighted results.
# Create the survey design with new data
design <- svydesign(ids = ~1, data = dat_varm, weights = NULL)
ps <- postStratify(design, ~gender + age_group + education_group, pop_joint, partial = TRUE)
tab_med_weighted <- create_weighted_table(~gender + med_money, ps, money_labels)
apa_table(
tab_med_weighted, caption = 'Table 4. Summarizes Medical expenses in the last 3 years using survey weights.',
digits = 3
)
| 0 | 1 - <100 | 100 - <250 | 250 - <500 | 500 - <750 | 750 - <1000 | >= 1000 | |
|---|---|---|---|---|---|---|---|
| Female | 0.067 | 0.130 | 0.139 | 0.159 | 0.153 | 0.087 | 0.265 |
| Male | 0.114 | 0.157 | 0.167 | 0.202 | 0.079 | 0.103 | 0.178 |
The table below shows TCM expenses across the genders.
tab_tcm <- create_table(dat_var, 'tcm_money', 'TCM Expenses', money_labels)
apa_table(tab_tcm, caption = 'Table 5. Summarizes TCM expenses in the last 3 years.', digits = 3)
| 0 | 1 - <100 | 100 - <250 | 250 - <500 | 500 - <750 | 750 - <1000 | >= 1000 | |
|---|---|---|---|---|---|---|---|
| Female | 0.434 | 0.111 | 0.111 | 0.145 | 0.075 | 0.046 | 0.078 |
| Male | 0.666 | 0.076 | 0.076 | 0.044 | 0.054 | 0.044 | 0.041 |
We again present the weighted results.
tab_tcm_weighted <- create_weighted_table(~gender + tcm_money, ps, money_labels)
apa_table(
tab_tcm_weighted, caption = 'Table 5. Summarizes TCM expenses in the last 3 years using survey weights.',
digits = 3
)
| 0 | 1 - <100 | 100 - <250 | 250 - <500 | 500 - <750 | 750 - <1000 | >= 1000 | |
|---|---|---|---|---|---|---|---|
| Female | 0.425 | 0.099 | 0.128 | 0.132 | 0.075 | 0.054 | 0.088 |
| Male | 0.599 | 0.070 | 0.139 | 0.053 | 0.056 | 0.051 | 0.032 |
Since we analyze data from participants who all know TCM, we remove the variable tcm_known from the dataframe.
table(dat_var$tcm_known) # all participants know TCM
##
## 1
## 1062
dat_stat <- select(dat_var, -tcm_known)
dim(dat_stat)
## [1] 1062 19
This data holds 1062 participants, 156 of which wished to not report their income group.
table(dat_stat$income) # 11 means not reported
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## 68 18 63 109 152 151 147 66 45 23 64 156
Similarly, 34 people did not know their medical expenses.
table(dat_stat$med_money) # 7 means not reported
##
## 0 1 2 3 4 5 6 7
## 72 140 146 187 137 102 244 34
We recode these to be NA and will impute these data later.
dat_stat <- dat_stat %>%
mutate(
income = ifelse(income == 11, NA, income), # 11 is NA
med_money = ifelse(med_money == 7, NA, med_money) # 7 is NA
) %>%
# re-order columns for later
dplyr::select(
gender, age, income, education, med_money, disease,
tcm_use, tcm_frequency, tcm_science, tcm_trust, tcm_money,
globuli_use, globuli_pass, vax_use, vax_fresh, needle_use, needle_frequency
)
sum(is.na(dat_stat$income)) # 156 reported no income
## [1] 156
sum(is.na(dat_stat$med_money)) # 34 reported no medical expenses
## [1] 34
The use variables do not add more information than the frequency variables, and so we remove them from subsequent analysis.
with(dat_stat, table(needle_use, needle_frequency))
## needle_frequency
## needle_use 0 1 2 3 4
## 0 438 1 0 0 0
## 1 191 215 82 63 72
with(dat_stat, table(tcm_use, tcm_frequency))
## tcm_frequency
## tcm_use 0 1 2 3 4
## 0 403 1 0 0 0
## 1 108 249 107 75 119
dat_stat <- dplyr::select(dat_stat, -needle_use, -tcm_use)
We use a Bayesian Gaussian copula graphical model to explore the multivariate dependencies in our data. Most of our variables are ordinal and binary, except for age, which we treat as continuous. We use a Gaussian copula graphical model which allows us to model each variable on its proper domain. This is implemented in the R package BGGM (Williams & Mulder, 2020).
apply(dat_stat, 2, function(x) length(unique(x)))
## gender age income education
## 2 70 12 8
## med_money disease tcm_frequency tcm_science
## 8 2 5 5
## tcm_trust tcm_money globuli_use globuli_pass
## 5 7 2 2
## vax_use vax_fresh needle_frequency
## 5 5 5
library('BGGM')
library('qgraph')
library('ggplot2')
library('RColorBrewer')
# Grouped into A, B, C, D, E groups (see below)
node_names <- c(
'Gender', 'Age', 'Income', 'Education', 'Medical expenses', 'Chronic disease',
'TCM usage frequency', 'Perception of scientific support', 'Trust in TCM-certified MDs',
'TCM expenses', 'Homeopathy usage', 'Homeopathy propagation', 'Vaccination usage',
'Booster vaccination', 'Acupuncture usage frequency'
)
plot_graph <- function(mat, errors, node_names, main = NULL, legend = TRUE, ...) {
# Groups: Individual variables, TCM, Homeopathy, Vaccination, Accupuncture
cols <- brewer.pal(5, 'Set3')
groups <- c(
rep('A. Individual variables', 6),
rep('B. TCM', 4),
rep('C. Homeopathy', 2),
rep('D. Vaccination', 2),
rep('E. Acupuncture', 1)
)
qgraph(
# mat, edge.color = ifelse(mat < 0, cols[4], cols[5]),
mat, edge.color = ifelse(mat < 0, 'darkred', 'darkblue'),
pie = errors,
layout = 'circle',
pieColor = 'skyblue',
color = cols,
groups = groups,
nodeNames = node_names,
legend.mode = 'style1',
legend = legend, ...
)
if (!is.null(main)) {
title(main, font.main = 1, line = 2.8, cex.main = 1.8)
}
}
We impute the missing data using mice.
library('mice')
dat_c <- mice(dat_stat, m = 10, printFlag = FALSE)
dat_c <- complete(dat_c)
We present two analyses: one with and one without post-stratification.
We first present the analysis without post-stratification. We visualize the network below. Note that relations between nodes represent partial correlations.
if (!file.exists('fitted-bggm.RDS')) {
m <- estimate(dat_c, iter = 4000, type = 'mixed', analytic = FALSE, impute = FALSE)
pred <- predictability(m, iter = 1000)
saveRDS(list('pred' = pred, 'm' = m), 'fitted-bggm.RDS')
} else {
obj <- readRDS('fitted-bggm.RDS')
m <- obj$m
pred <- obj$pred
}
mat <- m$pcor_mat
errors <- sapply(pred$scores, mean)
# pdf('Figures/Bayesian-pcor.pdf', width = 6, height = 6)
plot_graph(
mat, errors, node_names,
main = 'Partial correlation network', legend.cex = 0.50, legend = F
)
# dev.off()
We visualize the correlation network below.
library('corpcor')
diag(mat) <- 1
mat_cor <- pcor2cor(mat)
# pdf('Figures/Bayesian-cor2.pdf', width = 10, height = 8)
plot_graph(
mat_cor, errors, node_names,
main = 'Correlation network', legend.cex = 0.50, legend = T
)